{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !wget https://raw.githubusercontent.com/huseinzol05/Malaya-Dataset/master/dictionary/dialect/kelantan.csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "kelantan = pd.read_csv('kelantan.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import malaya\n",
    "\n",
    "malays = malaya.texts._malay_words._malay_words"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "from unidecode import unidecode\n",
    "\n",
    "def cleaning(string):\n",
    "    string = unidecode(string).replace('.', '. ').replace(',', ' , ')\n",
    "    string = re.sub('[^\\'\"A-Za-z\\-/ ]+', ' ', string)\n",
    "    string = re.sub(r'[ ]+', ' ', string.lower()).strip()\n",
    "    return string"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "words = []\n",
    "for i in range(kelantan.shape[0]):\n",
    "    try:\n",
    "        words.extend(cleaning(kelantan['0'].iloc[i]).split())\n",
    "    except:\n",
    "        pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# http://w3hafizm.blogspot.com/2010/11/kamus-kelantan-loghat-kelate.html\n",
    "\n",
    "additional = \"\"\"\n",
    "agah / hagah – sombongan\n",
    "api stok(api stop)- traffic light\n",
    "api stop -traffic light\n",
    "asore bodi – insuran kereta / motora\n",
    "awe – panggilan manja kepada orang lelaki\n",
    "awe sejambo lebak - satu watak dalam drama main tteri\n",
    "basah jjerok – basah kuyup / lecun\n",
    "bbageh – kaedah pengubatan tradisi / jampib\n",
    "baloh – berkelahi\n",
    "bbini – berkawinb\n",
    "bojeng – guting rambutb\n",
    "borak – merebak\n",
    "bedua (sebutan bunyi dihidung) -rasa ingin marah / benci\n",
    "bekwoh – kenduri (mungkin berasal dari perkataan Inggeris – big work)\n",
    "belabik – (siapa tahu tolong jelaskan)\n",
    "belengah – melekit\n",
    "betak – kenyang\n",
    "betap wak – lembap\n",
    "biru kketu – biru yang amat sangat\n",
    "blebe – berleter\n",
    "bocah / boceh – buncit\n",
    "bojeng – fesyen rambut (contoh: bojeng 1-2 = rambut hampir botak)\n",
    "bok – buku (asal perkataan Iggeris – book)\n",
    "bokali – barangkali\n",
    "bokbong – cempelai (spesis musang)\n",
    "brona – rosak / budak yang menangis tak berhenti\n",
    "buah spelek – sesuatu (teknik) yg istimewa\n",
    "buah topoh – buah epal (diambil dr perkataan arab “tuffah” = epal)\n",
    "buah zabik – kismis\n",
    "buat nyayo – menyiksa\n",
    "buje / oghe buje – janda\n",
    "busuk banga – terlalu busuk\n",
    "busuk kohong – terlalu busuk\n",
    "butak – perut buncit\n",
    "ca – air yang bertakung di bawah atau tepi rumah\n",
    "cah keting – bentes\n",
    "cok – cangkul\n",
    "ccerok – perut berbunyi bila lapar\n",
    "cceruk – potong rumput untuk haiwan\n",
    "cebok – cedok (selalunya untuk cecair, mencedok)\n",
    "cema clote / cpolok – terlalu kotor\n",
    "cepelak – lancang\n",
    "cerah craca – terang benderang\n",
    "cerah perut – cirit birit\n",
    "cliko – curi tulang / pemalas\n",
    "cokek makbolok – cucuk pinggang kawan dengan jari telunjuk dgn tujuan bergurau\n",
    "comel llote – amat cantik\n",
    "cuwoh – curah\n",
    "dale so – pusat hiburan di Kota Bharu zaman dulu (asal perkataan Inggeris – show)\n",
    "ddasing – menari (asal perkataan Inggeris – dancing)\n",
    "dderak / bederak – suka merayau / berjalan\n",
    "debek – teruk (asal perkataan Inggeris – “the bad”)\n",
    "deh? – memohon restu atau persetujuan, lebih kurang “ok?”,\n",
    "dekpong gak eh – kalau ya pun\n",
    "dermo basikal – asal perkataan Inggeris -”dynamo”\n",
    "dok cckoh – duduk bercangkung\n",
    "dok kene (sebutan English – doc care nay) – ya bukan / “isn’t it”\n",
    "dok ko? – ya tak?\n",
    "dok? – lebih kurang “bukan?”, “betul tak?” atau “isn’t it?”\n",
    "duga / luga – tak sedap perut /loya (sebutan ikut daerah)\n",
    "gaduh – nak cepat\n",
    "gak – lebih kurang “habis tu”\n",
    "gdebe – berani / samsing\n",
    "gege – bising / riuh rendah biasanya ada bunyi ketawa (asal perkataan Inggeris – giggle) – contohnya: jange ggege gak! = don’t giggle! Please\n",
    "gelebek – biasanya terjadi pada mata pisau/parang bila kerat benda keras (tumpul gelebek – sangat tumpul).\n",
    "gelembong boya – kueh daripada beras pulut (species dodol)\n",
    "gelenyar / gletah – merenyam, getik\n",
    "gelega – lantai\n",
    "gemuk ddebok – terlalu gemok\n",
    "genyeh – tenyeh\n",
    "geretak – jambatan\n",
    "getah sokmo – kueh daripada ubi keledek\n",
    "getek – juga\n",
    "ggapo – apa\n",
    "ggatih teksi – kayuh beca\n",
    "ggocoh – bertumbuk\n",
    "ggoghi – bagi memulakan sesuatu\n",
    "ghak – semak\n",
    "ghohok = sukar, susah, payah\n",
    "goba = risau\n",
    "gong – tolol\n",
    "gonyoh – gosok dgn kuat\n",
    "griak – kahak\n",
    "guano – lagu mana / macam mana\n",
    "hapok kohong – bau hampak\n",
    "hnja – tendang\n",
    "ho (sebutan bunyi dihidung) – “ya lah..” atau “yes”\n",
    "honda samah – honda cub 50cc\n",
    "hungga – berlari\n",
    "ike kkhonge – ikan cencaru\n",
    "istek – ladang (asal perkataan Inggeris – estate)\n",
    "jamah – pegang / sentuh\n",
    "jatuh celabok – jatuh berteraburan\n",
    "jebat – bau yang dikeluar oleh binatang untuk menanda kawasannya (contoh: jebat musang) – kepada orang Melaka… maaf lah ya\n",
    "jebbeng – berjambang\n",
    "jebeh – mencebek\n",
    "jebo – botol kaca\n",
    "jelira – sedap / kena dgn slera\n",
    "jellaq – tamak / orang kepalaparan dapat makanan\n",
    "jemba – berjumpa / sua\n",
    "jemeleh – sembelih (contoh: tak jemeleh ko? = tak sembelih lembu ke? )\n",
    "jemore – lantai basah, gelegar bulu atau batang pinang – rumah kampung dulu-dulu\n",
    "jenera – lena\n",
    "jerkoh – sergah\n",
    "jjolor – menjulur\n",
    "jjughuh = baik (jjughuh budok tu = baik budak itu)\n",
    "jong kako = tukang angkat hidangan makan / penanggah)\n",
    "kabil – salah satu teknik mendayung sampan\n",
    "kaki sbaye – kuku jari kaki yg rosak dan berbau\n",
    "karya basikal – tempat letak barang di belakang basikal (asal perkataan Inggeris – “carrier”)kasut bok = kasut jenis “boot”\n",
    "kayae – bisanya digunakan untuk tanda kawasan rumput yang nak disabit\n",
    "kdolok – lawak\n",
    "kebek – membuka ruang menjadi lebih besar\n",
    "kecek – pujuk / goda\n",
    "kekoh – gigit\n",
    "kelaring – kotor (asal perkataan Inggeris – colouring)\n",
    "kelong/belong – tipu muslihat\n",
    "kelorek – kedekut\n",
    "kenye bboyah – terlalu kenyang\n",
    "kepek idung - kueh puteri mandi\n",
    "kereta plek – kereta sewa / teksi\n",
    "kerlong – greedy / tenong\n",
    "kesit – sunyi (lonely)\n",
    "ketik – gigit kecil\n",
    "ketik ttunga – kurus / kecil serta pucat\n",
    "ketok bodi – buat / baiki badan kenderaan\n",
    "kkecek – bercakap\n",
    "kketei – kantin\n",
    "klikpah-klikpah – terpinga pinga\n",
    "kodi – tidak berkualiti\n",
    "ko’o – ketawa berdekah-dekah\n",
    "kota – cukai jalan atau insurans (contoh: kreta kawe tak dok kota )\n",
    "kuca hanya / kuca lana – berteraboran\n",
    "kuda – sepak\n",
    "kuk / kok – sekeh\n",
    "kuning nnehe / llehe – terlalu kuning\n",
    "kupik – kedekut\n",
    "lamoke – nanti kan\n",
    "lari kecik ppala-ppala – lari terlalu laju / lintang pukang\n",
    "lecah – payau atau becak (contoh: “toksoh lah awe…lecah” maksudnya ” tak usah lah ya … payau”\n",
    "leweh – kurang solid\n",
    "lipotei – tidak tetap duduk / ke sana ke sini\n",
    "lobey – gila-gila atau bengong\n",
    "loleh – tak serious\n",
    "lorong tua (sebutan bunyi dihidung) – kawasan pelacuran di Kota Bharu zaman dulu (dah tak ada lagi sekarang ni)\n",
    "mahkamah tinggi ayoh kob – mahkamah tinggi high court\n",
    "main tteri – drama tari pengubatan tradisi\n",
    "mamba – tok segar / tegar\n",
    "manih lleting – terlalu manis\n",
    "mase ppughik – terlalu masam\n",
    "masin ppeghak – terlalu masin\n",
    "mek – panggilan manja untuk orang perempuan\n",
    "merket – pasar (asal perkataan Inggeris – market)\n",
    "mmeda – buang air besar\n",
    "mmupo – mandi sungai\n",
    "mokte – rambutan\n",
    "ngga – tolol\n",
    "nghele – menghadiri kenduri\n",
    "ngidung – sengau\n",
    "ngusuk – terakhir\n",
    "nnakut / penakut apah – terlalu penakut\n",
    "nnate – binatang (kadangkala “simbol” kemesraan antara kawan)\n",
    "nnawak – bohong\n",
    "nneja – pengurus (asal perkataan Inggeris – Manager)\n",
    "nneting – melantun\n",
    "nngapo – meracaun\n",
    "nungei nyor – “somersault” – kadang-kadang dibuat perumpamaan frust nungei nyor (frust somersault la tu!!)\n",
    "nnusuk – sembunyi (main nnusuk – main hide and seek)\n",
    "nnyaba – tak kuat\n",
    "nnyaca – terhuyung hayang nak jatuh\n",
    "nok ssega angin – metodologi pengubatan cara tarditional contohnya main puteri\n",
    "nyace – kayu atau besi yang dipacak ke tanah..tambak lembu\n",
    "nyapong – carut\n",
    "nyayo – kesian\n",
    "nyior koter – kelapa tua\n",
    "pah? – lepas itu?\n",
    "pahit llepe – lerlalu pahit\n",
    "pakddahak – tanda silang atau pangkah\n",
    "papok – bapok / pondan\n",
    "patat siput – kemahiran (skill) contoh: tak dak patat siput – maknanya tak ada kemahiran\n",
    "pecah peda – kentut\n",
    "pekong – baling\n",
    "pelepong lembu – paru-paru lembu\n",
    "penampa tawa – penampar percuma\n",
    "pengah – gedik / getik (greedy)\n",
    "perone – tempat membuang / bakar sampah\n",
    "perut besar – mengandung\n",
    "petong – baling\n",
    "pitu gek – pintu pagar (gate)\n",
    "plungo – kayu atau besi tajam untuk tambat lembu\n",
    "pok ko – spesis mengkarung tapi boleh memanjat\n",
    "pozek – bayar muka (asal perkataan Inggeris – deposit)\n",
    "ppala bakul – cukai yang dikenakan oleh majlis bandaran ke atas peniaga\n",
    "ppala bubus – kepala botak\n",
    "ppatak – paling bawah\n",
    "ppiyah – ketayap / kopiah\n",
    "prebet sapu – teksi sapu\n",
    "prekso – peperiksaan\n",
    "pungga – baling\n",
    "putih ssueh – terlalu putih\n",
    "putung kalong – batang kayu untuk dibaling\n",
    "ralek – leka\n",
    "rauk muka – sapu muka\n",
    "redas – cakap laser / baling\n",
    "rhoyat – bagitahu / maklumkan\n",
    "rhukah – panjat\n",
    "rima – harimau\n",
    "rizat – keputusan (asal perkataan Inggeris – result)\n",
    "roba – getah pemadam ( asal perkataan Inggeris – rubber)\n",
    "sa – satu\n",
    "sabik – sebab\n",
    "saing – kawan\n",
    "saksoba – penyerap hentak (asal perkataan Inggeris – sock absorber)\n",
    "samah – 50 sen\n",
    "samah seghia – pendapatan kecil untuk sara anak isteri\n",
    "saru – serabut\n",
    "satu sut – berpakaian lengkap\n",
    "seghia dua - pendapatan kecik untuk sara anak isteri\n",
    "seh inguh – hembus hingus\n",
    "selareh – selalu\n",
    "sele-bele – tak kemas / cuai\n",
    "sengeleng – sengaja\n",
    "senyap tipah – tak ada khabar berita\n",
    "sgeto – kawasan “keras” – ada penunggu\n",
    "sghia – RM1.00\n",
    "sia – sembuh dari luka (asal perkataan Inggeris – seal)\n",
    "sleke – silakan\n",
    "smaye ssejid – sembahyang di masjid\n",
    "smeesek – terlalu mudah\n",
    "smuta – kain lilit kepala\n",
    "sobek – hias\n",
    "sokmo – sentiasa\n",
    "sopeh – serpih\n",
    "ssong – sesuai (tak ssong – tak sesuai)\n",
    "ssumba – pewarna untuk makanan/minuman\n",
    "suku – berasingan (bungkus suku – bungkus asing-asing)\n",
    "supik – beg plastiksupik gelenyar /supik rhokrhak – beg plastik yang nipis\n",
    "suwih – swis (asal perkataan Inggeris – switch)\n",
    "tak cakno – tak hirau\n",
    "tak mmado – tak peduli / tak padan\n",
    "tak pok – tak cukup cerdik\n",
    "tak ppaka – tidak terpakai (tak pakka benda – tak menjadi)\n",
    "tak rak – tak mampu\n",
    "tanggong – lebih kurang “ssong” – sesuai\n",
    "tawar heber – terlalu tawar\n",
    "tepoh – langgar\n",
    "tohok – buang\n",
    "tok bageh – kepala upaca bbageh\n",
    "tok ggawa – ketua daerah\n",
    "tok kerani – kerani di pejabat\n",
    "tok laki – suami\n",
    "tok mindok – tukang gesek rebab main tteri atau mok yong\n",
    "tok nebeng – ketua kampung\n",
    "tok nngulu – ketua mukim\n",
    "tok peraih – berniaga kecil-kecilan di pasar\n",
    "tok ppeti – mufti\n",
    "tok tteri – kepala upacara main puteritonye – ejek dgn memek muka\n",
    "ttino – betina / perempuan (oghe ttino kawe = isteri saya)\n",
    "ttino garik – perempuan jalang ( disebut bila dalam keadaan marah)\n",
    "ttuyup – pepatung\n",
    "tubik – keluar\n",
    "tuke ttesen – tukang tulis / taip petisyen\n",
    "tunja – tendang\n",
    "turik – berdesing pendengaran – sakit hati, marah\n",
    "wak nganyi – perli / ejek dgn kata-kata\n",
    "wok lor – tolol\n",
    "yak!! – lebih kurang “opocot!!!” atau “oops!!”\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "for line in additional.split('\\n'):\n",
    "    if not len(line):\n",
    "        continue\n",
    "    c = cleaning(unidecode(line).split('-')[0]).split('/')\n",
    "    \n",
    "    words.extend([cleaning(i) for i in c])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# https://www.facebook.com/AnakPerantauanKelate/posts/kamus-loghat-kelantanacu-tra-teh-cubaagah-berlagakalik-sebelahambak-kejarambo-sa/1109775215739242/\n",
    "\n",
    "additional = \"\"\"\n",
    "acu tra teh - cuba\n",
    "agah - berlagak\n",
    "alik - sebelah\n",
    "ambak - kejar\n",
    "ambo - saya\n",
    "apung - apam balik\n",
    "abe-abang\n",
    "apo-apa\n",
    "baju gumbea - baju melayu\n",
    "bakpo - kenapa\n",
    "bbera - berpecah\n",
    "bberong - crowded\n",
    "bBobok - meracau\n",
    "bbol - mentol\n",
    "beca - silau\n",
    "bedo oh - melampau\n",
    "beg duit - dompet\n",
    "behe - pukul dari sisi\n",
    "bejebok - limpah\n",
    "bekeng - garang\n",
    "bekwoh - kenduri\n",
    "berat - teruk\n",
    "bidah - lastik\n",
    "bim - tidur utk baby\n",
    "biru dacing - biru bendera umno\n",
    "blana kokna - sangat banyak\n",
    "bledo - Agar-agar\n",
    "bo laa - sudah la\n",
    "bocong - botol air\n",
    "boh (boss) - ayah\n",
    "bokbong - baabon\n",
    "boktea - dibuat oleh\n",
    "bolok - selekeh\n",
    "brona - bermasalah\n",
    "buah setow - buah manggis\n",
    "buoh badminton - bulu tangkis\n",
    "buoh lanah - nenas\n",
    "buoh pauh - mangga\n",
    "buoh sawo nilo - ciku\n",
    "buoh tterea - jagus\n",
    "busuk banga - sangat busuk\n",
    "cakkelat - gula-gula\n",
    "celeng - tabung syiling\n",
    ".... dale duo lagi – tak pasti lagi\n",
    "deghak – bersiar-siar\n",
    "dekcok – main teng-teng\n",
    "demo – kamu/awak\n",
    "denu – di sana\n",
    "do..oh - melampau\n",
    "dok alik mano? – berada di mana?\n",
    "dok cace - berdiri\n",
    "dok chongok – duduk bersila\n",
    "dok jjerung mano – duduk belah mana?\n",
    "dok ttinggung - bertinggung\n",
    "drebar – memandu;pemandu\n",
    "etek – juga\n",
    "gaha - berus\n",
    "gamok - agak\n",
    "gedebe/ddebe - samseng\n",
    "gegea glegok – bising sangat\n",
    "gewe - awek\n",
    "ggaji – gergaji\n",
    "ggilo - minat\n",
    "ggocoh – gaduh\n",
    "gguling bating – berguling-guling\n",
    "ghanggoh – makan\n",
    "gheknge nnapung – sangat ringan\n",
    "ghoyak - beritahu\n",
    "ghukah - panjat\n",
    "glecoh - terseliuh\n",
    "glenya – mengada-ada\n",
    "glepar - menggelepar\n",
    "glewak - menyibuk\n",
    "gligo - permata\n",
    "godio – apa dia\n",
    "golok - parang\n",
    "gongok – gigi rosak\n",
    "gonyoh - berus\n",
    "gostae - undur ke belakang\n",
    "gouk - reban/kandang\n",
    "ggorek - pengasah pensel\n",
    "gu - kawan\n",
    "guano – bagaimana;tanya keadaan\n",
    "gumbo air – sedut air\n",
    "hija pah – hijau bendera PAS\n",
    "hite legea – terlalu hitam\n",
    "hoo - ya\n",
    "huduh sepa – sangat hodoh\n",
    "igak - tangkap\n",
    "ikut ah – suka hati la\n",
    "ipung - kumpul\n",
    "jambe - tandas\n",
    "jangok – melaram\n",
    "jauh jelak – sangat jauh\n",
    "jebeh – masam muka\n",
    "jebour – botol\n",
    "jenero - tertidur\n",
    "jjerik – menangis\n",
    "jjolo – terjulur\n",
    "jok - sejak\n",
    "jjujuk - tersusun panjang\n",
    "jo ong - mendung\n",
    "jolo – betul la tu\n",
    "kaba-kaba - sedar-sedar/tiba-tiba\n",
    "kain klubung – kain tudung\n",
    "kain smaye – kain telekung\n",
    "kaing sehe – kain basahan\n",
    "kalea - pensel\n",
    "katok – puku dari atas\n",
    "kawe - saya\n",
    "kecek - bercakap\n",
    "keheak - ludah\n",
    "kerek - perigi\n",
    "keghah kekong - keras yg teramat koho/selo - perlahan/slow\n",
    "kekoh – gigit\n",
    "kelik - balik\n",
    "khapoh - sampah\n",
    "khepok gote - keropok lekor\n",
    "khetah - kertas\n",
    "kheto - kereta\n",
    "khiput - berkedut\n",
    "khobek - selongkar\n",
    "khusi - kerusi\n",
    "kito – saya(kata ganti nama bg orang muda terhadap org tua)\n",
    "kkacik - sangat rapat\n",
    "kkesok - terkalih\n",
    "kkorea – pengisar kelapa\n",
    "kkubey - berselerak\n",
    "kokok – sodok\n",
    "kokse kokdea – kucar kacir\n",
    "kona - membelok\n",
    "koya/agas - perasaan\n",
    "kube – terabur\n",
    "kuey kekoh cha – buah melaka\n",
    "lagu mano - bagaimana\n",
    "lembik – lemah\n",
    "li-lah2 – ke sana ke mari\n",
    "liut – lemau\n",
    "loghat - nak cepat\n",
    "lokpak tikea – nama sejenis makanan\n",
    "lok - biarkan\n",
    "lugar – loya\n",
    "liyk - elak\n",
    "main ceklat – main helah\n",
    "manggok – bangga diri\n",
    "manih lleting – terlalu manis\n",
    "mase purik – terlalu masin\n",
    "masin perak – terlalu masin\n",
    "mbek – kambing (panggilan untuk tarik perhatian kanak2)\n",
    "meroh merea – merah darah\n",
    "metoo - degil\n",
    "minyok mati bbunuh - minyak dagu\n",
    "mmakoh - berhujah\n",
    "mmapuh - kenakan\n",
    "mmecok - merajuk\n",
    "mmeteak – kacau; ganggu\n",
    "mmolek – buat elok-elok\n",
    "mmunoh – buat kerosakan\n",
    "mmutar itik - mencari sesuatu dengan gelabah\n",
    "moktea - rambutan\n",
    "monggek - bonceng\n",
    "mugo – panggilan utk sesuatu yg tidak diketahui namanyo\n",
    "mung - engkau\n",
    "nah – menyatakan banyak\n",
    "namo rima? – apa dia tu?\n",
    "ndow - buaian\n",
    "ngaji - belajar\n",
    "ngaju - merajuk\n",
    "ngakok – merangkak\n",
    "ngala – mewarnakan\n",
    "nganying - mempersenda\n",
    "ngejah - negotiate\n",
    "ngekkoh – akikah\n",
    "ngepek – berleter\n",
    "nguraa – conteng\n",
    "nnakak – tertarik perhatian\n",
    "nnarak kow – berlebih-lebihan\n",
    "nnawok - menipu\n",
    "nnebah - menebas\n",
    "nnepik - menjerit\n",
    "nnerak - meniarap\n",
    "nnetea – melentang\n",
    "nneting - melantun\n",
    "nnise – gula melaka\n",
    "nnoney - bergayut\n",
    "nnonye – mencebik muka\n",
    "nokpak - melompat\n",
    "nukih - melukis\n",
    "ook-aloh – menyatakan keluhan\n",
    "otokk - tertekan\n",
    "panje jolo - panjang sangat\n",
    "parowk - teruk\n",
    "patak-bawah\n",
    "penar-silau\n",
    "pe'ea - perangai\n",
    "pekdoh - faedah\n",
    "pekong - baling\n",
    "peseng – style\n",
    "peti sejok - peti ais\n",
    "pitih-duit/wang\n",
    "pleting - straw\n",
    "pparowk - selisih\n",
    "pok daro nitas-polis di depan\n",
    "pok daro-pakcik\n",
    "ppena - silau\n",
    "ppioh kopiah\n",
    "puah ea lokk – bosan\n",
    "punoh - rosak\n",
    "putih sepow – sangat putih\n",
    "putihstar - purple\n",
    "putik lingo – pengorek telinga\n",
    "relaa - tercerai\n",
    "roba- pemadam\n",
    "rrobok - almari\n",
    "rukah – panjat\n",
    "sedho – kurang baik\n",
    "segho - terasa\n",
    "segowk - menanduk\n",
    "sek – geng;kumpulan\n",
    "selok - pengsan\n",
    "seney mesek – sangat senang\n",
    "senyap ttipah – terlalu senyap\n",
    "sero – rase, agak2\n",
    "siga – tangga\n",
    "singo beng - air limau air\n",
    "ssika bmx - basikal BMX\n",
    "smeta - sekejap\n",
    "skali Arung - Sekaligus\n",
    "sejuk Ketta – sangat sejuk\n",
    "Smuta – kain serban\n",
    "srebea - kebas\n",
    "Ssakok - tersangkut\n",
    "ssaloh kaki – terseliuh kaki\n",
    "ssekong - kaku\n",
    "ssikal - basikal\n",
    "ssiko – penyodok\n",
    "ssoyok – terkoyak\n",
    "stambung - bertimbun\n",
    "sugho - bubur asyura\n",
    "suko selok - ketawa terbahak-bahak\n",
    "sungguh – kata penguat bagi sifat:comel sungguh\n",
    "siak dok ggobar - Jangan risau\n",
    "saksoba - Absorber\n",
    "tape – rakan suara\n",
    "tawar hebea – sangat tawar\n",
    "temo'o – nak sangat\n",
    "tino lawa-wanita cantik\n",
    "timu cino - tembikai\n",
    "tohor - cetek\n",
    "tohouk - buang\n",
    "tok bbutir- tak jelas\n",
    "tok cekak – tak larat\n",
    "tok kaba - tak sedar\n",
    "toksoh - jangan / tak usah\n",
    "trelak – tidur sekejap\n",
    "triok – nangis\n",
    "ttala - tertangguh\n",
    "ttbolah – tidak berhati-hati;cuai, memalukan\n",
    "ttebeng – ambil risiko\n",
    "tteke - tertekan\n",
    "ttumbok – bertumbok\n",
    "ttuyup - pepatung\n",
    "tok cakno - tak hirau\n",
    "tepoh - langgar\n",
    "tubik - keluar\n",
    "taik Aye Munea - Tahi Ayam Yang Masih Hangat\n",
    "ubi stelo - ubi keledek\n",
    "vi-bagi\n",
    "wak gapo-buAt apa\n",
    "woh nyo-buah kelapa\n",
    "yak boter-ye la tu\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "683"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "for line in additional.split('\\n'):\n",
    "    if not len(line):\n",
    "        continue\n",
    "    c = cleaning(unidecode(line).split('-')[0]).split('/')\n",
    "    \n",
    "    words.extend([cleaning(i) for i in c])\n",
    "    \n",
    "len(words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "451"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "words = [i for i in words if len(i) > 3]\n",
    "    \n",
    "words = set(words) - malays\n",
    "len(words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "with open('kelantan-words.json', 'w') as fopen:\n",
    "    json.dump(list(words), fopen)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
